{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "进行模型融合\n",
    "\n",
    "    \n",
    "\"\"\"\n",
    "\n",
    "from __future__ import division\n",
    "import numpy as np\n",
    "import pandas as pd \n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "import lightgbm as lgb\n",
    "from lightgbm.sklearn import LGBMClassifier\n",
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "from sklearn.svm import LinearSVC\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.metrics import log_loss  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "##==================== 文件路径 ====================##\n",
    "\n",
    "fp_train_f = \"feature_engineering/train_f.csv\"\n",
    "fp_test_f  = \"feature_engineering/test_f.csv\"\n",
    "## 子训练集\n",
    "#fp_sub_train_f = \"feature_engineering/sub_train_f.csv\"\n",
    "#fp_sub_test_f = \"feature_engineering/sub_test_f.csv\"\n",
    "\n",
    "## label encoder for gbdt input\n",
    "fp_lb_enc = \"feature_engineering/lb_enc\"\n",
    "\n",
    "## one-hot encoder for gbdt output\n",
    "fp_oh_enc_gbdt = \"gbdt/oh_enc_gbdt\"\n",
    "\n",
    "## 预训练模型的存储\n",
    "\n",
    "fp_gbdt_model = \"gbdt/gbdt_model\"\n",
    "fp_lr_model = \"lr/lr_model\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv(train_csv) \n",
    "test = pd.read_csv(test_x_csv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>click</th>\n",
       "      <th>C1</th>\n",
       "      <th>banner_pos</th>\n",
       "      <th>site_domain</th>\n",
       "      <th>site_id</th>\n",
       "      <th>site_category</th>\n",
       "      <th>app_id</th>\n",
       "      <th>app_category</th>\n",
       "      <th>device_type</th>\n",
       "      <th>device_conn_type</th>\n",
       "      <th>C14</th>\n",
       "      <th>date</th>\n",
       "      <th>time_period</th>\n",
       "      <th>weekday</th>\n",
       "      <th>C15_C16</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.000009e+18</td>\n",
       "      <td>0</td>\n",
       "      <td>1005</td>\n",
       "      <td>0</td>\n",
       "      <td>f3845767</td>\n",
       "      <td>1fbe01fe</td>\n",
       "      <td>28905ebd</td>\n",
       "      <td>ecad2386</td>\n",
       "      <td>07d7df22</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>15706</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000017e+19</td>\n",
       "      <td>0</td>\n",
       "      <td>1005</td>\n",
       "      <td>0</td>\n",
       "      <td>f3845767</td>\n",
       "      <td>1fbe01fe</td>\n",
       "      <td>28905ebd</td>\n",
       "      <td>ecad2386</td>\n",
       "      <td>07d7df22</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>15704</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.000037e+19</td>\n",
       "      <td>0</td>\n",
       "      <td>1005</td>\n",
       "      <td>0</td>\n",
       "      <td>f3845767</td>\n",
       "      <td>1fbe01fe</td>\n",
       "      <td>28905ebd</td>\n",
       "      <td>ecad2386</td>\n",
       "      <td>07d7df22</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>15704</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000064e+19</td>\n",
       "      <td>0</td>\n",
       "      <td>1005</td>\n",
       "      <td>0</td>\n",
       "      <td>f3845767</td>\n",
       "      <td>1fbe01fe</td>\n",
       "      <td>28905ebd</td>\n",
       "      <td>ecad2386</td>\n",
       "      <td>07d7df22</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>15706</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.000068e+19</td>\n",
       "      <td>0</td>\n",
       "      <td>1005</td>\n",
       "      <td>1</td>\n",
       "      <td>9166c161</td>\n",
       "      <td>fe8cc448</td>\n",
       "      <td>0569f928</td>\n",
       "      <td>ecad2386</td>\n",
       "      <td>07d7df22</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>18993</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>270</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             id  click    C1  banner_pos site_domain   site_id site_category  \\\n",
       "0  1.000009e+18      0  1005           0    f3845767  1fbe01fe      28905ebd   \n",
       "1  1.000017e+19      0  1005           0    f3845767  1fbe01fe      28905ebd   \n",
       "2  1.000037e+19      0  1005           0    f3845767  1fbe01fe      28905ebd   \n",
       "3  1.000064e+19      0  1005           0    f3845767  1fbe01fe      28905ebd   \n",
       "4  1.000068e+19      0  1005           1    9166c161  fe8cc448      0569f928   \n",
       "\n",
       "     app_id app_category  device_type  device_conn_type    C14  date  \\\n",
       "0  ecad2386     07d7df22            1                 2  15706    21   \n",
       "1  ecad2386     07d7df22            1                 0  15704    21   \n",
       "2  ecad2386     07d7df22            1                 0  15704    21   \n",
       "3  ecad2386     07d7df22            1                 0  15706    21   \n",
       "4  ecad2386     07d7df22            1                 0  18993    21   \n",
       "\n",
       "   time_period  weekday  C15_C16  \n",
       "0            0        1      270  \n",
       "1            0        1      270  \n",
       "2            0        1      270  \n",
       "3            0        1      270  \n",
       "4            0        1      270  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_id = test.id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = train.drop(['click', 'id'],axis=1)\n",
    "y = pd.DataFrame(train['click'])\n",
    "X_submission = test.drop(['id'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# LabelEncoder\n",
    "for column in X.columns:\n",
    "    lb_enc = LabelEncoder()\n",
    "    lb_enc.fit(X[column].values)\n",
    "    X[column] = lb_enc.transform(X[column].values)\n",
    "    \n",
    "for column in X_submission.columns:\n",
    "    lb_enc = LabelEncoder()\n",
    "    lb_enc.fit(X_submission[column].values)\n",
    "    X_submission[column] = lb_enc.transform(X_submission[column].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 244,
   "metadata": {},
   "outputs": [],
   "source": [
    "# one hot\n",
    "from dummyPy import OneHotEncoder\n",
    "cols = []\n",
    "for col in X.columns:\n",
    "    cols.append(col)\n",
    "oh_enc = OneHotEncoder(cols)\n",
    "oh_enc.fit(X)\n",
    "oh_enc.fit(X_submission)\n",
    "X = oh_enc.transform(X)\n",
    "X_submission = oh_enc.transform(X_submission)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(0)  # seed to shuffle the train set\n",
    "\n",
    "n_folds = 10\n",
    "verbose = True\n",
    "shuffle = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "if shuffle:\n",
    "        idx = np.random.permutation(y.size)\n",
    "        X = X[idx]\n",
    "        y = y[idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "sfolder = StratifiedKFold(n_splits=5,random_state=0,shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "bst = RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "bst2 = RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='entropy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "bst3 = LGBMClassifier(\n",
    "        num_leaves = 7,\n",
    "        objective =  'binary',\n",
    "        max_depth = 7,\n",
    "        learning_rate = 0.1,\n",
    "        max_bin = 200,\n",
    "        n_estimators = 40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "bst4 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=30,  \n",
    "        max_depth=8,\n",
    "        min_child_weight=4,\n",
    "        gamma=0,\n",
    "        subsample=0.8,\n",
    "        colsample_bytree=0.9,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'binary:logistic',\n",
    "        reg_alpha = 2,\n",
    "        reg_lambda = 0.1,\n",
    "        #n_jobs = -1,\n",
    "        nthread=-1,\n",
    "        seed=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "clfs = [ bst,bst2,bst3,bst4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_blend_train = np.zeros((X.shape[0], len(clfs)))\n",
    "dataset_blend_test = np.zeros((X_submission.shape[0], len(clfs)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "skf = list(sfolder.split(X.values, y.values.reshape(y.shape[0])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
      "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
      "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
      "            min_samples_leaf=1, min_samples_split=2,\n",
      "            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,\n",
      "            oob_score=False, random_state=None, verbose=0,\n",
      "            warm_start=False)\n",
      "Fold0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',\n",
      "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
      "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
      "            min_samples_leaf=1, min_samples_split=2,\n",
      "            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,\n",
      "            oob_score=False, random_state=None, verbose=0,\n",
      "            warm_start=False)\n",
      "Fold0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  del sys.path[0]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n",
      "        importance_type='split', learning_rate=0.1, max_bin=200,\n",
      "        max_depth=7, min_child_samples=20, min_child_weight=0.001,\n",
      "        min_split_gain=0.0, n_estimators=40, n_jobs=-1, num_leaves=7,\n",
      "        objective='binary', random_state=None, reg_alpha=0.0,\n",
      "        reg_lambda=0.0, silent=True, subsample=1.0,\n",
      "        subsample_for_bin=200000, subsample_freq=0)\n",
      "Fold0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3 XGBClassifier(base_score=0.5, colsample_bylevel=0.7, colsample_bytree=0.9,\n",
      "       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=8,\n",
      "       min_child_weight=4, missing=None, n_estimators=30, nthread=-1,\n",
      "       objective='binary:logistic', reg_alpha=2, reg_lambda=0.1,\n",
      "       scale_pos_weight=1, seed=3, silent=True, subsample=0.8)\n",
      "Fold0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fold4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/preprocessing/label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    }
   ],
   "source": [
    "for j, clf in enumerate(clfs):\n",
    "    print(j, clf)\n",
    "    dataset_blend_test_j = np.zeros((X_submission.shape[0], len(skf)))\n",
    "    for i, (train, test) in enumerate(skf):\n",
    "        \n",
    "        print(\"Fold\" + str(i))\n",
    "        X_train = X.loc[train]\n",
    "        y_train = y.loc[train]\n",
    "        X_test = X.loc[test]\n",
    "        y_test = y.loc[test]\n",
    "        \n",
    "        \n",
    "        clf.fit(X_train, y_train)\n",
    "        y_submission = clf.predict_proba(X_test)[:, 1]\n",
    "        dataset_blend_train[test, j] = y_submission\n",
    "        \n",
    "        dataset_blend_test_j[:, i] = clf.predict_proba(X_submission)[:, 1]\n",
    "dataset_blend_test[:, j] = dataset_blend_test_j.mean(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/chychy/anaconda3/envs/py3/lib/python3.6/site-packages/sklearn/utils/validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    }
   ],
   "source": [
    "#clf = LogisticRegression(C=0.01)\n",
    "clf = LogisticRegression(C=0.1)\n",
    "clf.fit(dataset_blend_train, y)\n",
    "y_submission = clf.predict_proba(dataset_blend_test)[:, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.15793902,  0.14111951,  0.14111951, ...,  0.46964549,\n",
       "        0.1021727 ,  0.13492453])"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_submission"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_result = pd.DataFrame(columns=[\"id\",\"click\"])\n",
    "test_result.id = test_id\n",
    "test_result.click = y_submission\n",
    "test_result.to_csv(\"test_predict_result2.csv\",index=None,encoding='utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
